* This script contains the code to replicate TableG of the on-line appendix in Ductor, L., Fafchamps, M., Goyal S. and M. van der Leij. Social Networks and Research Output. The Review of Economics and Statistics.
/*Using productivity discounted by the number of authors and article's lenght*/
log using TableG.log, replace
use prodndnp_fullsample_dt.dta, clear


set more off

/*******OLS REGRESIONS**************/

/*OLS out-of-sample program*/
cap pro drop ols_os
program define ols_os
syntax varlist(numeric min=1) [if]
marksample touse
reg `varlist' if `touse', vce(cluster auth)
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3dt<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrols= (lprodf3dt-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrols)if ybols<.  /*RSEE of the predicted model*/
quietly sum peols    /*summarizing the RSEE*/
quietly scalar rssols=r(max)  /*obtaining the total RSEE*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2ols=(rssols/nols)^0.5   /*RMSE out of sample*/
di n1 " Number of observations in the in-sample group"
di rmseo2ols "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
drop peols ybols
end

/*MODEL 0*/

reg lprodf3dt y2-y27 nopapers t2-t27 if group==1
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3dt<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM0= (lprodf3dt-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM0)if ybols<.  /*RSEE of the predicted model*/
quietly sum peols    /*summarizing the RSEE*/
quietly scalar rssols=r(max)  /*obtaining the total RSEE*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2ols=(rssols/nols)^0.5   /*RMSE out of sample*/
di n1 " Number of observations in the in-sample group"
di rmseo2ols "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
drop peols ybols


/*MODEL 1*/

reg lprodf3dt y2-y27 nopapers t2-t27 lprodf3ldt-lprodf3l14dt if group==1
gen e=1 if e(sample)
qui scalar n1=e(N)
estat ic
scalar rmseols=e(rmse)
predict ybols if group==2 & lprodf3dt<., xb 
scalar r2i1ols= e(r2)   /*computing the R2 of the baseline model*/
qui gen sqerrolsM1= (lprodf3dt-ybols)^2 if ybols<.
quietly egen peols=sum(sqerrolsM1)if ybols<.  /*RSEE of the predicted model*/
quietly sum peols    /*summarizing the RSEE*/
quietly scalar rssols=r(max)  /*obtaining the total RSEE*/
quietly scalar nols=r(N)    /*number of observations in group 2*/
scalar rmseo2ols=(rssols/nols)^0.5   /*RMSE out of sample*/
di n1 " Number of observations in the in-sample group"
di rmseo2ols "  Pooled OLS out-of-sample RMSE"
di nols " Number of observations in the out-of-sample group"
drop peols ybols

/**********MODEL 3. OLS regressions, coauthors' productivity**********/


ols_os lprodf3dt y2-y27 nopapers t2-t27 lprodf3ldt-lprodf3l14dt lnetproddt1y-lnetproddt12y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3dt<., lag(14)
drop sqerrols diffsqerrlM3

/*******MODEL 3. OLS regressions, coauthors' coauthors productivity*******/


ols_os lprodf3dt y2-y27 nopapers t2-t27 lprodf3ldt-lprodf3l14dt lnetprod2dt1y-lnetprod2dt12y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3dt<., lag(14)
drop sqerrols diffsqerrlM3

/*Predictions are almost equivalent*/


/*MODEL 3. OLS regressions, DEGREE*/


/* Comparing the performance with simple OLS*/
ols_os lprodf3dt y2-y27 nopapers t2-t27 lprodf3ldt-lprodf3l14dt degree1y-degree4y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3dt<., lag(14)
drop sqerrols diffsqerrlM3

/*MODEL 3. OLS regressions, DEGREE2*/



ols_os lprodf3dt y2-y27 nopapers t2-t27 lprodf3ldt-lprodf3l14dt degree21y-degree25y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3dt<., lag(14)
drop sqerrols diffsqerrlM3

/*MODEL 3. OLS regressions, GC*/


/*OLS out-of-sample program*/

ols_os lprodf3dt y2-y27 nopapers t2-t27 lprodf3ldt-lprodf3l14dt gc1y-gc8y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3dt<., lag(14)
drop sqerrols diffsqerrlM3

/*MODEL 3. OLS regressions, BET*/

ols_os lprodf3dt y2-y27 nopapers t2-t27 lprodf3ldt-lprodf3l14dt gc1y-gc9y lbet1y-lbet9y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3dt<., lag(14)
drop sqerrols diffsqerrlM3

/*MODEL 3. OLS regressions, CLOS*/


ols_os lprodf3dt y2-y27 nopapers t2-t27 lprodf3ldt-lprodf3l14dt gc1y-gc7y lclos1y-lclos8y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3dt<., lag(14)
drop sqerrols diffsqerrlM3

/*MODEL 3. OLS regressions, Coauthors Top Dummy variable*/

ols_os lprodf3dt y2-y27 nopapers t2-t27 lprodf3ldt-lprodf3l14dt neiq1fsdt1y-neiq1fsdt12y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3dt<., lag(14)
drop sqerrols diffsqerrlM3

/*******MULTIVARIATE MODEL 3. OLS***********/

ols_os lprodf3dt y2-y27 nopapers t2-t27 lprodf3ldt-lprodf3l14dt lnetproddt1y-lnetproddt8y lnetprod2dt1y-lnetprod2dt8y degree1y-degree8y degree21y-degree28y gc1y-gc8y lbet1y-lbet8y lclos1y-lclos8y neiq1fsdt1y-neiq1fsdt8y if e==1

/*Diebold-Mariano test*/
gen diffsqerrlM3=sqerrols-sqerrolsM1
newey2 diffsqerrlM3 if group==2 & lprodf3dt<., lag(14)
drop sqerrols diffsqerrlM3

/*****MOdel 2*****/

/*Coauthors productivity*/
ols_os lprodf3dt y2-y27 nopapers t2-t27  lnetproddt1y lnetprod2dty lnetproddt3y lnetproddt4y lnetproddt5y lnetproddt6y lnetproddt7y lnetproddt8y lnetproddt9y lnetproddt10y lnetproddt11y lnetproddt12y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2=sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3dt<., lag(12)
drop sqerrols diffsqerrlM2

/*Coauthors' coauthors productivity*/
ols_os lprodf3dt y2-y27 nopapers t2-t27  lnetprod2dt1y lnetprod2dt2y lnetprod2dt3y lnetprod2dt4y lnetprod2dt5y lnetprod2dt6y lnetprod2dt7y lnetprod2dt8y lnetprod2dt9y lnetprod2dt10y lnetprod2dt11y lnetprod2dt12y-lnetprod2dt15y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2=sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3dt<., lag(15)
drop sqerrols diffsqerrlM2

/*Degree*/
ols_os lprodf3dt y2-y27 nopapers t2-t27  degree1y degree2y degree3y degree4y degree5y degree6y degree7y degree8y degree9y degree10y degree11y degree12y-degree15y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2=sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3dt<., lag(15)
drop sqerrols diffsqerrlM2

/*Degree 2*/
ols_os lprodf3dt y2-y27 nopapers t2-t27  degree21y degree22y degree23y degree24y degree25y degree26y degree27y degree28y degree29y degree210y degree211y degree212y degree213y degree214y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2=sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3dt<., lag(14)
drop sqerrols diffsqerrlM2

/* Giant component*/
ols_os lprodf3dt y2-y27 nopapers t2-t27  gc1y gc2y-gc15y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2=sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3dt<., lag(15)
drop sqerrols diffsqerrlM2

/* Betweenness*/
ols_os lprodf3dt y2-y27 nopapers t2-t27  gc1y gc2y-gc15y lbet1y-lbet15y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2=sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3dt<., lag(15)
drop sqerrols diffsqerrlM2

/*Closeness*/
ols_os lprodf3dt y2-y27 nopapers t2-t27  gc1y gc2y-gc15y lclos1y-lclos15y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2=sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3dt<., lag(15)
drop sqerrols diffsqerrlM2

/*Coauthors top 1 dummy variable*/
ols_os lprodf3dt y2-y27 nopapers t2-t27  neiq1fsdt1y neiq1fsdt2y neiq1fsdt3y neiq1fsdt4y neiq1fsdt5y neiq1fsdt6y neiq1fsdt7y neiq1fsdt8y neiq1fsdt9y neiq1fsdt10y neiq1fsdt11y neiq1fsdt12y neiq1fsdt13y neiq1fsdt14y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2=sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3dt<., lag(14)
drop sqerrols diffsqerrlM2

/*****MV Model 2*****/

ols_os lprodf3dt y2-y27 nopapers t2-t27 lnetproddt1y-lnetproddt15y lnetprod2dt1y-lnetprod2dt15y degree1y-degree15y degree21y-degree215y gc1y-gc15y lbet1y-lbet15y lclos1y-lclos15y neiq1fsdt1y-neiq1fsdt15y if e==1
/*Diebold-Mariano test*/
gen diffsqerrlM2=sqerrols-sqerrolsM1
newey2 diffsqerrlM2 if group==2 & lprodf3dt<., lag(15)
drop sqerrols diffsqerrlM2


log close

